Purpose: Run genetic stock identification analysis and generate various output files.

5.1 Data

Read in formatted genotype data, see BaselineDataTesting.R

Code
# formatted reference and mixture genotypes
ref_input <- read_csv("/Users/jeffbaldock/Library/CloudStorage/GoogleDrive-jbaldock@uwyo.edu/Shared drives/wyo-coop-baldock/UWyoming/Snake River Cutthroat/Analyses/Snake River GSI Quarto/Baseline Testing/UpperSnakeRiver_GTseq_InputData_NoSibs_clean_baseline.csv")
mix_input <- read_csv("/Users/jeffbaldock/Library/CloudStorage/GoogleDrive-jbaldock@uwyo.edu/Shared drives/wyo-coop-baldock/UWyoming/Snake River Cutthroat/Analyses/Snake River GSI Quarto/Baseline Testing/UpperSnakeRiver_GTseq_InputData_NoSibs_clean_mixture.csv") %>% filter(collection != "snake_jldcattlemens")
ref_input <- ref_input %>% mutate_if(is.double, as.integer) # make sure everything is an interger
mix_input <- mix_input %>% mutate_if(is.double, as.integer) # make sure everything is an interger

Get metadata

Code
# get metadata
ids <- read_csv("/Users/jeffbaldock/Library/CloudStorage/GoogleDrive-jbaldock@uwyo.edu/Shared drives/wyo-coop-baldock/UWyoming/Snake River Cutthroat/Analyses/Snake River GSI Quarto/LabFieldIDs.csv")
data <- read_csv("/Users/jeffbaldock/Library/CloudStorage/GoogleDrive-jbaldock@uwyo.edu/Shared drives/wyo-coop-baldock/UWyoming/Snake River Cutthroat/Analyses/Snake River GSI Quarto/Snake_GSI_field data_2020-2022_GenOnly_DropNoDrop_090823edit.csv")
metadata <- left_join(ids, data) %>% 
  select(indiv, CORRgenID, year, site, TL.mm, TL.in, FL.mm, weight.g, weight.lbs, collection.type, sizecat) %>% 
  filter(collection.type == "mixture", site != "snake_jldcattlemens")
metadata[duplicated(metadata$indiv),] # check for duplicates
# A tibble: 0 × 11
# ℹ 11 variables: indiv <chr>, CORRgenID <chr>, year <dbl>, site <chr>,
#   TL.mm <dbl>, TL.in <dbl>, FL.mm <dbl>, weight.g <dbl>, weight.lbs <dbl>,
#   collection.type <chr>, sizecat <dbl>

Get self-assignment test results:

Code
self_test_exam <- read_csv("/Users/jeffbaldock/Library/CloudStorage/GoogleDrive-jbaldock@uwyo.edu/Shared drives/wyo-coop-baldock/UWyoming/Snake River Cutthroat/Analyses/Snake River GSI Quarto/Baseline Testing/Self Assignment/self_assign_results_individuals.csv")

Get groundwater metrics, GPS locations, and repunit-section distances for plotting

Code
# groundwater metrics
gwmet <- read_csv("/Users/jeffbaldock/Library/CloudStorage/GoogleDrive-jbaldock@uwyo.edu/Shared drives/wyo-coop-baldock/UWyoming/Snake River Cutthroat/Analyses/Snake River GSI Quarto/Landscape Covariates/Groundwater/GroundwaterMetrics_raw_RepUnits.csv") %>% 
  rename(repunit = site) %>% 
  mutate(logarea = log(areasqkm), loggwi = log(gwi_iew05km)) %>% 
  mutate(z_logarea = as.numeric(scale(logarea)), z_loggwi = as.numeric(scale(loggwi))) %>%
  arrange(repunit)

gps <- read_csv("/Users/jeffbaldock/Library/CloudStorage/GoogleDrive-jbaldock@uwyo.edu/Shared drives/wyo-coop-baldock/UWyoming/Snake River Cutthroat/Analyses/Snake River GSI Quarto/Landscape Covariates/Location Data/RepUnit_LatLong.csv")

dist <- read_csv("/Users/jeffbaldock/Library/CloudStorage/GoogleDrive-jbaldock@uwyo.edu/Shared drives/wyo-coop-baldock/UWyoming/Snake River Cutthroat/Analyses/Snake River GSI Quarto/Landscape Covariates/Flowline Distance/SnakeRiverSections_RepUnits_FlowlineDistance.csv") 

Generate data files year-aware GSI.

Code
# mixture global
# mix_input_global <- mix_input %>% mutate(collection = "snake")
# unique(mix_input_global$collection)

# mixtures by year
metayr <- metadata %>% select(indiv, year)
mix_input_yr <- mix_input %>% left_join(metayr) %>% mutate(collection = paste(collection, year, sep = "_")) %>% select(-year)

See unique collections

Code
unique(mix_input_yr$collection)
 [1] "snake_wilsonsouthpark_2020"  "snake_deadmansmoose_2020"   
 [3] "snake_southparkastoria_2020" "snake_astoriawesttable_2020"
 [5] "snake_moosewilson_2020"      "snake_deadmansmoose_2021"   
 [7] "snake_pacificdeadmans_2021"  "snake_wilsonsouthpark_2021" 
 [9] "snake_southparkastoria_2021" "snake_moosewilson_2021"     
[11] "snake_astoriawesttable_2021" "snake_moosewilson_2022"     
[13] "snake_astoriawesttable_2022" "snake_pacificdeadmans_2022" 
[15] "snake_wilsonsouthpark_2022"  "snake_deadmansmoose_2022"   
[17] "snake_southparkastoria_2022"

5.2 Run GSI - by Section and Year

Run GSI with collections specific to sections and years. Mthod “PB” provides bootstrapped corrected mixing proportions.

Code
gsi_sectyear <- infer_mixture(reference = ref_input, mixture = mix_input_yr, gen_start_col = 5, method = "PB")
saveRDS(gsi_sectyear, "/Users/jeffbaldock/Library/CloudStorage/GoogleDrive-jbaldock@uwyo.edu/Shared drives/wyo-coop-baldock/UWyoming/Snake River Cutthroat/Analyses/Snake River GSI Quarto/GSI Analysis/By Section and Year/UpperSnake_GSI_SectionYear_output.RDS")
write_csv(gsi_sectyear$bootstrapped_proportions, "/Users/jeffbaldock/Library/CloudStorage/GoogleDrive-jbaldock@uwyo.edu/Shared drives/wyo-coop-baldock/UWyoming/Snake River Cutthroat/Analyses/Snake River GSI Quarto/GSI Analysis/By Section and Year/UpperSnake_GSI_SectionYear_BootstrappedProportions.csv")
gsi_sectyear <- readRDS("/Users/jeffbaldock/Library/CloudStorage/GoogleDrive-jbaldock@uwyo.edu/Shared drives/wyo-coop-baldock/UWyoming/Snake River Cutthroat/Analyses/Snake River GSI Quarto/GSI Analysis/By Section and Year/UpperSnake_GSI_SectionYear_output.RDS")

5.2.1 GSI Diagnostics

5.2.1.1 MAP

Get the maximum-a-posteriori population for each individual. Do mainstem individuals assign to a source population (reporting group) with high probability? Do certain reporting groups have many individuals that assign with low certainty? (if so, may suggest missing reporting groups from baseline)

Code
map_rows <- gsi_sectyear$indiv_posteriors %>%
  group_by(indiv) %>%
  top_n(1, PofZ) %>%
  ungroup()
ss <- map_rows %>% group_by(repunit) %>% summarise(n = n())

View the distribution of MAP values by reporting group:

Code
ggplot(map_rows, aes(x = repunit, y = PofZ)) +
  geom_boxplot(aes(colour = repunit)) +
  geom_text(data = ss, mapping = aes(y = 1.025, label = n), angle = 90, hjust = 0, vjust = 0.5, size = 3) + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 9, vjust = 0.5)) +
  ylim(c(NA, 1.05)) +
  guides(colour = FALSE) +
  xlab("Reporting group") + ylab("Maximum likelihood")

5.2.1.2 Z Scores

Plot density of observed z-scores (blue) against expected (black)…if all contributing pops are represented in the baseline, distributions should be similar.

Generate values from a normal distribution

Code
normo <- tibble(z_score = rnorm(1e06))

Test for differences between observed and random normal distributions using a Kolmogorov-Smirnov test. p << 0.05 indicates there are problematic individuals (distributions are not the same).

Code
ks.test(normo, map_rows$z_score)

    Asymptotic two-sample Kolmogorov-Smirnov test

data:  normo and map_rows$z_score
D = 0.082052, p-value = 3.174e-09
alternative hypothesis: two-sided

Plot observed (black) and expected (blue) distributions of z-scores

Code
ggplot(map_rows, aes(x = z_score)) +
  geom_density(colour = "blue") +
  geom_density(data = normo, colour = "black")

View distribution of z-scores by population

Code
ggplot(map_rows, aes(x = repunit, y = z_score)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

Following Bowersox, Hargrove, et al 2023 (NAJFM), individual likely originated from an unsampled pop if +/- 2 SDs from the mean z-score. Find problematic individuals

Code
meanz <- mean(map_rows$z_score)
zproblems <- map_rows %>% filter(z_score > meanz+2 | z_score < meanz-2)
datatable(zproblems)

Fish originating from pops outside our baseline represent ~6.57% of all fish sampled in the mixture

Code
dim(zproblems)[1] / dim(map_rows)[1] 
[1] 0.06569343

Which section of the mainstem Snake River were these problematic individuals sampled from?

Code
datatable(zproblems %>% mutate(snake_section = str_sub(mixture_collection, 1, str_length(mixture_collection)-5)) %>% group_by(snake_section) %>% summarize(count = n()))

Which reporting group are the problematic individuals assigning to?

Code
datatable(zproblems %>% group_by(repunit) %>% summarize(count = n()))

Other papers compare z-scores from observed mixture data (red) to z-scores generated from the self-assignment tests (blue):

Code
par(mfrow = c(6,7), mar = c(2,2.5,3,0.1))
sites <- sort(unique(map_rows$repunit))
for (i in 1:length(sites)) {
  mx <- map_rows %>% filter(repunit == sites[i])
  bl <- self_test_exam %>% filter(repunit == sites[i])
  ztib <- tibble(type = c(rep("mx", times = dim(mx)[1]), rep("bl", times = dim(bl)[1])), z = c(mx$z_score, bl$z_score))
  boxplot(z ~ type, ztib, col = c("blue", "red"), main = sites[i])
}

5.2.2 BS mixing proportions

Inspect bootstrap-corrected mixing proportions, because from self-assignment tests and mixture simulations, we know raw mixing proportions will be somewhat biased.

Summarize raw and bootstrap corrected mixing proportions by reporting group, calculate mean residual for each collection.

Code
tt <- gsi_sectyear$mixing_proportions %>% 
  group_by(mixture_collection, repunit) %>% summarize(pi = sum(pi)) %>% ungroup() %>% 
  left_join(gsi_sectyear$bootstrapped_proportions) %>%
  mutate(diff = bs_corrected_repunit_ppn - pi,
         section = str_sub(mixture_collection, 7, -6),
         year = str_sub(mixture_collection, -4))
tt %>% group_by(mixture_collection) %>% summarize(mr = sum(bs_corrected_repunit_ppn - pi) / length(unique(repunit)))
# A tibble: 17 × 2
   mixture_collection                 mr
   <chr>                           <dbl>
 1 snake_astoriawesttable_2020  4.66e-19
 2 snake_astoriawesttable_2021  8.33e-18
 3 snake_astoriawesttable_2022  4.04e-18
 4 snake_deadmansmoose_2020    -2.13e-17
 5 snake_deadmansmoose_2021     8.48e-18
 6 snake_deadmansmoose_2022     3.56e-18
 7 snake_moosewilson_2020       1.28e-18
 8 snake_moosewilson_2021      -3.78e-18
 9 snake_moosewilson_2022       7.91e-18
10 snake_pacificdeadmans_2021   8.14e-18
11 snake_pacificdeadmans_2022   2.43e-19
12 snake_southparkastoria_2020  3.64e-18
13 snake_southparkastoria_2021 -1.25e-17
14 snake_southparkastoria_2022 -3.34e-18
15 snake_wilsonsouthpark_2020   1.23e-18
16 snake_wilsonsouthpark_2021   1.53e-17
17 snake_wilsonsouthpark_2022  -5.69e-18

Calculate Pearson’s r correlation coefficient between raw and bootstrap-corrected mixing proportions:

Code
tt <- tt %>% mutate(sectionID = recode(section, 
                                 "pacificdeadmans" = "A",
                                 "deadmansmoose" = "B",
                                 "moosewilson" = "C",
                                 "wilsonsouthpark" = "D",
                                 "southparkastoria" = "E",
                                 "astoriawesttable" = "F"),
                    year = as.numeric(year))
tt2 <- add_row(tt, year = 2020, sectionID = "A")
cor.test(tt2$pi, tt2$bs_corrected_repunit_ppn)

    Pearson's product-moment correlation

data:  tt2$pi and tt2$bs_corrected_repunit_ppn
t = 209.79, df = 882, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.9887422 0.9913441
sample estimates:
      cor 
0.9901281 

Plot raw vs bootstrap-corrected mixing proportions. Red line = 1:1.

Code
tt2 %>% ggplot() + 
  geom_point(aes(x = pi, y = bs_corrected_repunit_ppn)) + 
  geom_abline(intercept = 0, slope = 1, col = "red") + 
  facet_grid(factor(sectionID) ~ year) + 
  xlab("Uncorrected mixing proportion") + ylab("Bootstrap-corrected mixing proportion") + theme_bw()

Plot bootstrap-corrected mixing proportions by reporting unit, Snake River section, and year.

Code
tt %>% 
  add_row(repunit = unique(tt$repunit), bs_corrected_repunit_ppn = rep(0, 52), section = rep("pacificdeadmans", 52), year = rep(2020, 52)) %>% 
  mutate(year = as.factor(year)) %>%
  ggplot() +
  geom_bar(aes(x = repunit, y = bs_corrected_repunit_ppn, fill = year), stat = "identity", position = "dodge", color = "black") +
  facet_wrap(~ factor(section, levels = c("pacificdeadmans", "deadmansmoose", "moosewilson", "wilsonsouthpark", "southparkastoria", "astoriawesttable")), nrow = 6) +
  xlab("Reporting Group") + ylab("Corrected Mixture Proportion") + theme_bw() +
  theme_bw() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5))

Produce the same plot as above, except bar height represents mean bootstrap-corrected mixture proportion and error bars represent minimum and maximum (over 3 years of sampling).

Code
mylabs <- tibble(sectionID = c("A", "B", "C", "D", "E", "F"),
                 mylab = c("A", "B", "C", "D", "E", "F"))

tt %>% 
  group_by(sectionID, repunit) %>% 
  summarize(minppn = min(bs_corrected_repunit_ppn),
            meanppn = mean(bs_corrected_repunit_ppn),
            maxppn = max(bs_corrected_repunit_ppn)) %>% 
  ungroup() %>%
  left_join(gwmet) %>% left_join(gps) %>% 
  ggplot(aes(x = reorder(repunit, -lat), y = meanppn)) +
  geom_bar(aes(x = reorder(repunit, -lat), y = meanppn, fill = gwi_iew05km), stat = "identity", color = "black", width = 1) +
  geom_errorbar(aes(ymin = minppn, ymax = maxppn), width = 0) +
  facet_wrap(~ factor(sectionID, levels = c("A", "B", "C", "D", "E", "F")), nrow = 6) +
  xlab("Reporting group") + ylab("Corrected mixture proportion") + theme_bw() +
  theme_bw() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) + 
  theme(legend.position = "top", strip.text.x = element_blank(), legend.margin = unit(c(0,0,0,0), "pt"), legend.box.spacing = unit(5, "pt")) + 
  labs(fill = "Groundwater index") +
  scale_fill_viridis_c(direction = -1, guide = guide_colorbar(frame.colour = "black", ticks.colour = "black"), alpha = 0.8) +
  scale_x_discrete(labels = c(1:52)) + 
  geom_text(data = mylabs, aes(x = -Inf, y = Inf, label = mylab, hjust = -1, vjust = 1.5), size = 5)

Summarize among-year means as pie charts.

Code
mypies <- tt %>% 
  group_by(sectionID, repunit) %>% 
  summarize(minppn = min(bs_corrected_repunit_ppn),
            meanppn = mean(bs_corrected_repunit_ppn),
            maxppn = max(bs_corrected_repunit_ppn)) %>% 
  ungroup() %>%
  left_join(gwmet) %>% left_join(gps) %>% 
  mutate(sectionID = factor(sectionID, levels = c("A", "B", "C", "D", "E", "F"))) %>%
  arrange(sectionID, desc(gwi_iew05km)) %>%
  ggplot(aes(x = "", y = meanppn, fill = gwi_iew05km)) +
  geom_col() +
  coord_polar("y", start = 0) +
  facet_wrap(~ factor(sectionID, levels = c("A", "B", "C", "D", "E", "F"))) +
  theme_bw() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) + 
  theme(legend.position = "none", strip.text.x = element_blank(), axis.title.y = element_blank(), panel.border = element_blank(), panel.grid=element_blank(), axis.ticks = element_blank(), axis.text.x = element_blank()) + 
  scale_fill_viridis_c(direction = -1, alpha = 0.8) 
mypies

5.2.3 Coarse dispersal distance

Use Kolmogorov-Smirnof tests to test for differences in the distribution of “dispersal distances” among years. No differences.

Note that the distribution of approx. dispersal distance is robust to choice of maximum assignment probability. Min, max, and median dispersal distance is very similar for fish with PofZ > 0.7 and for those with PofZ > 0.95.

Code
ddd <- map_rows %>% 
  mutate(section = substr(mixture_collection, 1, nchar(mixture_collection)-5),
         year = as.numeric(substr(mixture_collection, nchar(mixture_collection)-3, nchar(mixture_collection)))) %>% 
  filter(PofZ >= 0.7) %>% left_join(dist)

# pairwise KS tests, do the distributions of distances differ among years?
ks.test(unlist(ddd %>% filter(year == 2020) %>% select(distkm)), unlist(ddd %>% filter(year == 2021) %>% select(distkm)))

    Asymptotic two-sample Kolmogorov-Smirnov test

data:  unlist(ddd %>% filter(year == 2020) %>% select(distkm)) and unlist(ddd %>% filter(year == 2021) %>% select(distkm))
D = 0.08159, p-value = 0.1265
alternative hypothesis: two-sided
Code
ks.test(unlist(ddd %>% filter(year == 2020) %>% select(distkm)), unlist(ddd %>% filter(year == 2022) %>% select(distkm)))

    Asymptotic two-sample Kolmogorov-Smirnov test

data:  unlist(ddd %>% filter(year == 2020) %>% select(distkm)) and unlist(ddd %>% filter(year == 2022) %>% select(distkm))
D = 0.084836, p-value = 0.1058
alternative hypothesis: two-sided
Code
ks.test(unlist(ddd %>% filter(year == 2021) %>% select(distkm)), unlist(ddd %>% filter(year == 2022) %>% select(distkm)))

    Asymptotic two-sample Kolmogorov-Smirnov test

data:  unlist(ddd %>% filter(year == 2021) %>% select(distkm)) and unlist(ddd %>% filter(year == 2022) %>% select(distkm))
D = 0.076703, p-value = 0.1695
alternative hypothesis: two-sided

Minimum, maximum, and median dispersal distances (km)

Code
min(ddd$distkm)
[1] 0.8656264
Code
max(ddd$distkm)
[1] 134.1021
Code
median(ddd$distkm)
[1] 23.86171

View distribution. Big bump at 25-30 is largely driven by Fish Creek fish.

Code
par(mar = c(4,4,1,1), mgp = c(2.5,1,0))
hist(ddd$distkm, breaks = 30, main = "", xlab = "Approximate dispersal distance (km)", ylab = "Number of fish")
legend("topright", border = NA, bty = "n", legend = c(paste("Median = ", round(median(ddd$distkm)), " km", sep = ""), paste("Maximum = ", round(max(ddd$distkm)), " km", sep = "")))

5.3 Run GSI - by Section

Run GSI with collections specific to sections, but agnostic to years. Mthod “PB” provides bootstrapped corrected mixing proportions.

Code
gsi_section <- infer_mixture(reference = ref_input, mixture = mix_input, gen_start_col = 5, method = "PB")
saveRDS(gsi_section, "/Users/jeffbaldock/Library/CloudStorage/GoogleDrive-jbaldock@uwyo.edu/Shared drives/wyo-coop-baldock/UWyoming/Snake River Cutthroat/Analyses/Snake River GSI Quarto/GSI Analysis/By Section/UpperSnake_GSI_Section_output.RDS")
write_csv(gsi_section$bootstrapped_proportions, "/Users/jeffbaldock/Library/CloudStorage/GoogleDrive-jbaldock@uwyo.edu/Shared drives/wyo-coop-baldock/UWyoming/Snake River Cutthroat/Analyses/Snake River GSI Quarto/GSI Analysis/By Section/UpperSnake_GSI_SectionYear_BootstrappedProportions.csv")
gsi_section <- readRDS("/Users/jeffbaldock/Library/CloudStorage/GoogleDrive-jbaldock@uwyo.edu/Shared drives/wyo-coop-baldock/UWyoming/Snake River Cutthroat/Analyses/Snake River GSI Quarto/GSI Analysis/By Section/UpperSnake_GSI_Section_output.RDS")

5.3.1 BS mixing proportions

Inspect bootstrap-corrected mixing proportions, because from self-assignment tests and mixture simulations, we know raw mixing proportions will be somewhat biased.

Summarize raw and bootstrap corrected mixing proportions by reporting group, calculate mean residual for each collection.

Code
tt <- gsi_section$mixing_proportions %>% 
  group_by(mixture_collection, repunit) %>% summarize(pi = sum(pi)) %>% ungroup() %>% 
  left_join(gsi_section$bootstrapped_proportions) %>%
  mutate(diff = bs_corrected_repunit_ppn - pi)
tt %>% group_by(mixture_collection) %>% summarize(mr = sum(bs_corrected_repunit_ppn - pi) / length(unique(repunit)))
# A tibble: 6 × 2
  mixture_collection            mr
  <chr>                      <dbl>
1 snake_astoriawesttable  5.74e-18
2 snake_deadmansmoose     1.73e-17
3 snake_moosewilson       4.12e-18
4 snake_pacificdeadmans   6.62e-18
5 snake_southparkastoria -9.91e-18
6 snake_wilsonsouthpark   2.67e-18

Plot raw vs bootstrap-corrected mixing proportions. Red line = 1:1.

Code
tt %>% ggplot() + 
  geom_point(aes(x = pi, y = bs_corrected_repunit_ppn)) + 
  geom_abline(intercept = 0, slope = 1, col = "red") + 
  facet_wrap(~ factor(mixture_collection, levels = c("snake_pacificdeadmans", "snake_deadmansmoose", "snake_moosewilson", "snake_wilsonsouthpark", "snake_southparkastoria", "snake_astoriawesttable"))) + 
  xlab("Mixture Proportion") + ylab("Corrected Mixture Proportion") + theme_bw()

View (bootstrap corrected) reporting group contributions by river section as bar plots

Code
gsi_section$bootstrapped_proportions %>% 
  ggplot() +
  geom_bar(aes(x = repunit, y = bs_corrected_repunit_ppn), stat = "identity", fill = "grey70", color = "black") +
  facet_wrap(~ factor(mixture_collection, levels = c("snake_pacificdeadmans", "snake_deadmansmoose", "snake_moosewilson", "snake_wilsonsouthpark", "snake_southparkastoria", "snake_astoriawesttable")), nrow = 6) +
  xlab("Reporting Group") + ylab("Corrected Mixture Proportion") + theme_bw() +
  theme_bw() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5))

Plot as above, but sort reporting groups by latitude (north to south) and color bars by groundwater influence:

Code
# join bootstrapped proportions with groundwater and gps location data
gsi_dat <- gsi_section$bootstrapped_proportions %>% left_join(gwmet) %>% left_join(gps)
# new names
sectnames <- c("snake_pacificdeadmans" = "A. Pacific - Deadman's", 
               "snake_deadmansmoose" = "B. Deadman's - Moose", 
               "snake_moosewilson" = "C. Moose - Wilson", 
               "snake_wilsonsouthpark" = "D. Wilson - South Park", 
               "snake_southparkastoria" = "E. South Park - Astoria", 
               "snake_astoriawesttable" = "F. Astoria - West Table")
mylabs <- tibble(mixture_collection = c("snake_pacificdeadmans", "snake_deadmansmoose", "snake_moosewilson", "snake_wilsonsouthpark", "snake_southparkastoria", "snake_astoriawesttable"),
                 mylab = c("A", "B", "C", "D", "E", "F"))
# plot
gsi_dat %>% 
  ggplot() +
  geom_bar(aes(x = reorder(repunit, -lat), y = bs_corrected_repunit_ppn, fill = gwi_iew05km), stat = "identity", color = "black") +
  facet_wrap(~ factor(mixture_collection, levels = c("snake_pacificdeadmans", "snake_deadmansmoose", "snake_moosewilson", "snake_wilsonsouthpark", "snake_southparkastoria", "snake_astoriawesttable")), nrow = 6, labeller = as_labeller(sectnames)) +
  xlab("Reporting Group") + ylab("Corrected Mixture Proportion") + theme_bw() +
  theme_bw() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) + 
  theme(legend.position = "top", strip.text.x = element_blank()) + labs(fill = "Groundwater index") +
  scale_fill_viridis_c(direction = -1, guide = guide_colorbar(frame.colour = "black", ticks.colour = "black"), alpha = 0.8) +
  scale_x_discrete(labels = c(1:52)) + 
  geom_text(data = mylabs, aes(x = Inf, y = Inf, label = mylab, hjust = 1.5, vjust = 1.5), size = 5)